#!/usr/bin/env python3
"""
AIM-65 Audio Cassette Tape Encoder
===================================
Encodes data into audio WAV files compatible with the Rockwell AIM-65
microcomputer's cassette tape interface. Supports Text and Object formats.

Encoding scheme (from Appendix F of the AIM-65 User's Guide):
  - FSK at 2400 / 1200 Hz sine wave
  - Each bit = 4 half-cycles, LSB first
  - Logic "1" = 4 half-cycles of 2400 Hz
  - Logic "0" = 1 half-cycle of 2400 Hz + 3 half-cycles of 1200 Hz
  - Checksums: block checksums are little-endian (6502 convention),
    object record checksums are big-endian (MOS tape format convention)


Written by Claude Opus 4.6 Extended
Directed by J R Casey Bralla


Usage:
  Text mode:    python3 aim65_encode.py -t input.bas output.wav
  Object mode:  python3 aim65_encode.py -b input.bin -a F000 output.wav

Requires: Python 3.6+
Runs on: Gentoo Linux (or any system with Python 3)
"""

import sys
import struct
import wave
import argparse
import os
import math


Version     = "1.03"
VersionDate = "March 15, 2026"


# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

SAMPLE_RATE = 44100
AMPLITUDE = 32000          # ~98% of 16-bit max for strong signal
SYN = 0x16                 # Synchronous Idle character
HASH = 0x23                # '#' — AIM-65 format marker
CR = 0x0D                  # Carriage Return
LF = 0x0A                  # Line Feed
EOF_MARKER = 0x1A          # End-of-file marker (Ctrl-Z)
SEMICOLON = 0x3B           # ';' — object record start

BLOCK_DATA_SIZE = 79       # data bytes per block (spec: Appendix F, page F-3)
MAX_RECORD_DATA = 24       # max data bytes per object record (0x18)
SYN_COUNT = 32             # SYN characters per block header

LEADER_SECONDS = 3.0       # 2400 Hz leader tone before first block
GAP_SECONDS = 2.5          # 2400 Hz tone between blocks (text mode needs time)
TRAILER_SECONDS = 0.5      # silence after last block


# ---------------------------------------------------------------------------
# Audio generation
# ---------------------------------------------------------------------------

def make_half_cycle(sample_rate, frequency, positive):
    """Generate samples for one half-cycle of a sine wave.
    Returns a list of 16-bit sample values.

    A half-sine produces a smooth analog-like waveform that the
    AIM-65 cassette hardware can reliably detect, unlike a square
    wave whose sharp edges and harmonics cause false zero-crossings."""
    n_samples = round(sample_rate / (2 * frequency))
    samples = []
    for i in range(n_samples):
        t = i / n_samples
        val = math.sin(math.pi * t)  # half sine: 0 → peak → 0
        if not positive:
            val = -val
        samples.append(int(val * AMPLITUDE))
    return samples


def make_tone(seconds, sample_rate, frequency=2400):
    """Generate a continuous tone (alternating sine half-cycles).

    Used for leader, inter-block gaps, and trailer. The AIM-65
    hardware needs a carrier tone to synchronize its clock recovery
    circuit before data begins.

    The half-cycle count is rounded up to a multiple of 4 so that
    bit framing stays aligned across block boundaries (each bit
    is 4 half-cycles)."""
    hc_samples = round(sample_rate / (2 * frequency))
    total_samples = int(seconds * sample_rate)
    # Compute half-cycle count, rounded up to next multiple of 4
    n_hc = total_samples // hc_samples
    n_hc = ((n_hc + 3) // 4) * 4  # round up to multiple of 4
    samples = []
    positive = True
    for _ in range(n_hc):
        samples.extend(make_half_cycle(sample_rate, frequency, positive))
        positive = not positive
    return samples


def encode_bit(bit_value, sample_rate):
    """Encode one bit as audio samples.

    Each bit = 4 half-cycles:
      "1": 4 half-cycles of 2400 Hz
      "0": 1 half-cycle of 2400 Hz + 3 half-cycles of 1200 Hz

    Polarity alternates: +, -, +, -"""
    samples = []
    if bit_value:
        # Logic "1": 4 half-cycles at 2400 Hz
        samples.extend(make_half_cycle(sample_rate, 2400, True))
        samples.extend(make_half_cycle(sample_rate, 2400, False))
        samples.extend(make_half_cycle(sample_rate, 2400, True))
        samples.extend(make_half_cycle(sample_rate, 2400, False))
    else:
        # Logic "0": 1 half-cycle at 2400 Hz + 3 half-cycles at 1200 Hz
        samples.extend(make_half_cycle(sample_rate, 2400, True))
        samples.extend(make_half_cycle(sample_rate, 1200, False))
        samples.extend(make_half_cycle(sample_rate, 1200, True))
        samples.extend(make_half_cycle(sample_rate, 1200, False))
    return samples


def encode_byte(byte_val, sample_rate):
    """Encode one byte as audio samples, LSB first."""
    samples = []
    for bit_pos in range(8):
        bit = (byte_val >> bit_pos) & 1
        samples.extend(encode_bit(bit, sample_rate))
    return samples


def encode_bytes(data, sample_rate):
    """Encode a sequence of bytes as audio samples."""
    samples = []
    for b in data:
        samples.extend(encode_byte(b, sample_rate))
    return samples


def make_silence(seconds, sample_rate):
    """Generate silence (zeros). Used for final quiet padding only."""
    return [0] * int(seconds * sample_rate)


# ---------------------------------------------------------------------------
# Block assembly
# ---------------------------------------------------------------------------

def build_block(blk_num, data_79):
    """Build a complete block byte sequence:
    32×SYN + '#' + BLK + 79 data bytes + 2 checksum bytes.

    The checksum covers BLK + 79 data bytes ("80 data characters"
    per Appendix F page F-4), stored little-endian.

    data_79 must be exactly 79 bytes."""
    assert len(data_79) == 79, f"Block data must be 79 bytes, got {len(data_79)}"

    block = []
    # SYN header
    block.extend([SYN] * SYN_COUNT)
    # Format marker and block number
    block.append(HASH)
    block.append(blk_num & 0xFF)
    # Data
    block.extend(data_79)
    # Block checksum: sum of BLK + 79 data bytes, truncated to 16 bits
    # Stored little-endian (low byte first) — 6502 firmware convention.
    # NOTE: This differs from object RECORD checksums which are big-endian.
    checksum = ((blk_num & 0xFF) + sum(data_79)) & 0xFFFF
    block.append(checksum & 0xFF)         # low byte first
    block.append((checksum >> 8) & 0xFF)  # high byte second

    return block


def encode_block_audio(block_bytes, sample_rate):
    """Encode a complete block as audio samples."""
    return encode_bytes(block_bytes, sample_rate)


# ---------------------------------------------------------------------------
# Object format: build data records and blocks
# ---------------------------------------------------------------------------

def build_object_record(address, data):
    """Build one object data record.

    Format: ; count addr_hi addr_lo data... chksum_hi chksum_lo CR

    Returns list of bytes."""
    count = len(data)
    assert 1 <= count <= MAX_RECORD_DATA
    addr_hi = (address >> 8) & 0xFF
    addr_lo = address & 0xFF

    # Checksum: sum of count + address bytes + data bytes
    # Stored big-endian (high byte first) matching the address convention
    # per Appendix F notation X₃X₂X₁X₀ (same order as A₃A₂A₁A₀)
    chk_bytes = [count, addr_hi, addr_lo] + list(data)
    checksum = sum(chk_bytes) & 0xFFFF

    record = [SEMICOLON, count, addr_hi, addr_lo]
    record.extend(data)
    record.append((checksum >> 8) & 0xFF)  # high byte first
    record.append(checksum & 0xFF)         # low byte second
    record.append(CR)

    return record


def build_last_record(record_count):
    """Build the end-of-file record.

    Format: ; 00 count_hi count_lo chksum_hi chksum_lo CR
    Checksum stored big-endian (high byte first) per Appendix F."""
    cnt_hi = (record_count >> 8) & 0xFF
    cnt_lo = record_count & 0xFF
    checksum = (cnt_hi + cnt_lo) & 0xFFFF

    return [SEMICOLON, 0x00, cnt_hi, cnt_lo,
            (checksum >> 8) & 0xFF, checksum & 0xFF, CR]


def build_object_data_stream(binary_data, start_address):
    """Convert binary data + address into a stream of object record bytes."""
    stream = []
    offset = 0
    record_count = 0

    while offset < len(binary_data):
        chunk_size = min(MAX_RECORD_DATA, len(binary_data) - offset)
        chunk = binary_data[offset:offset + chunk_size]
        address = start_address + offset
        record = build_object_record(address, chunk)
        stream.extend(record)
        offset += chunk_size
        record_count += 1

    # Add last record (count includes data records + last record itself)
    last = build_last_record(record_count + 1)
    stream.extend(last)

    return stream


def build_object_blocks(file_name, binary_data, start_address):
    """Build all blocks for an object file.

    First block: filename(5) + CR(1) + object data(73) = 79 bytes
    Mid blocks:  object data(79) = 79 bytes
    Last block:  object data(N) + zero fill(79-N) = 79 bytes

    Returns list of block byte-sequences."""
    # Pad/truncate filename to 5 chars
    name = file_name.ljust(5)[:5]
    name_bytes = [ord(c) & 0x7F for c in name]

    # Build complete object data stream
    obj_stream = build_object_data_stream(binary_data, start_address)

    # Pack into blocks
    blocks = []
    blk_num = 0
    stream_pos = 0

    # First block: 5 name + 1 CR + 73 data = 79
    first_data = list(name_bytes) + [CR]
    available = BLOCK_DATA_SIZE - 6  # 73 bytes
    chunk = obj_stream[stream_pos:stream_pos + available]
    stream_pos += len(chunk)
    first_data.extend(chunk)
    # Pad to 79
    first_data.extend([0] * (BLOCK_DATA_SIZE - len(first_data)))
    blocks.append(build_block(blk_num, first_data))
    blk_num += 1

    # Subsequent blocks: 79 bytes of object data
    while stream_pos < len(obj_stream):
        chunk = obj_stream[stream_pos:stream_pos + BLOCK_DATA_SIZE]
        stream_pos += len(chunk)
        block_data = list(chunk)
        block_data.extend([0] * (BLOCK_DATA_SIZE - len(block_data)))
        blocks.append(build_block(blk_num, block_data))
        blk_num += 1

    return blocks


# ---------------------------------------------------------------------------
# Text format: build blocks
# ---------------------------------------------------------------------------

def build_text_blocks(file_name, text_data):
    """Build all blocks for a text file.

    Text data should already be in AIM-65 format with CR LF line endings,
    leading spaces, and a 0x1A EOF marker (as produced by read_text_input).

    First block: filename(5) + text data(74) = 79 bytes
    Mid blocks:  text data(79) = 79 bytes
    Last block:  text data(N) + zero fill(79-N) = 79 bytes

    Returns list of block byte-sequences."""
    name = file_name.ljust(5)[:5]
    name_bytes = [ord(c) & 0x7F for c in name]

    td = list(text_data)

    blocks = []
    blk_num = 0
    data_pos = 0

    # First block: 5 name + 74 text
    first_data = list(name_bytes)
    available = BLOCK_DATA_SIZE - 5  # 74 bytes
    chunk = td[data_pos:data_pos + available]
    data_pos += len(chunk)
    first_data.extend(chunk)
    first_data.extend([0] * (BLOCK_DATA_SIZE - len(first_data)))
    blocks.append(build_block(blk_num, first_data))
    blk_num += 1

    # Subsequent blocks
    while data_pos < len(td):
        chunk = td[data_pos:data_pos + BLOCK_DATA_SIZE]
        data_pos += len(chunk)
        block_data = list(chunk)
        block_data.extend([0] * (BLOCK_DATA_SIZE - len(block_data)))
        blocks.append(build_block(blk_num, block_data))
        blk_num += 1

    return blocks


# ---------------------------------------------------------------------------
# Input file reading
# ---------------------------------------------------------------------------

def read_text_input(filename):
    """Read a text file and convert to AIM-65 Editor Text Buffer format.

    The AIM-65 stores text with:
      - A leading space (0x20) before each line
      - CR LF (0x0D 0x0A) line endings
      - End-of-text marker: CR LF CR LF 0x1A"""
    with open(filename, 'r') as f:
        text = f.read()

    # Normalize line endings
    text = text.replace('\r\n', '\n').replace('\r', '\n')
    # Remove trailing whitespace/newlines
    text = text.rstrip('\n')
    lines = text.split('\n')

    # Build byte stream: each line gets a leading space + CR LF
    result = []
    for line in lines:
        if not line:
            continue
        result.append(0x20)  # leading space (AIM-65 editor format)
        for ch in line:
            result.append(ord(ch) & 0x7F)
        result.extend([CR, LF])

    # End-of-text: CR LF + EOF marker (0x1A)
    result.extend([CR, LF, EOF_MARKER])

    return result


def read_binary_input(filename):
    """Read a binary file as raw bytes."""
    with open(filename, 'rb') as f:
        return list(f.read())


def read_hex_input(filename):
    """Read a hex dump file.

    Supported formats:
      1) Address: hex bytes    (e.g., "F000: 48 20 9E EB ...")
      2) Plain hex bytes       (e.g., "48 20 9E EB ...")
      3) Intel HEX format      (lines starting with ':')

    Returns (start_address, data_bytes)."""
    with open(filename, 'r') as f:
        lines = f.readlines()

    # Detect Intel HEX format
    if lines and lines[0].strip().startswith(':'):
        return read_intel_hex(lines)

    # Try address: hex format
    all_data = []
    first_address = None

    for line in lines:
        line = line.strip()
        if not line or line.startswith('#') or line.startswith(';'):
            continue

        # Strip ASCII column (everything after '|' or after two+ spaces)
        if '|' in line:
            line = line[:line.index('|')].strip()

        # Check for "ADDR: hex hex hex..." format
        if ':' in line:
            parts = line.split(':', 1)
            try:
                addr = int(parts[0].strip().lstrip('$'), 16)
                if first_address is None:
                    first_address = addr
                hex_str = parts[1].strip()
            except ValueError:
                hex_str = line
        else:
            hex_str = line

        # Parse hex bytes
        tokens = hex_str.split()
        for tok in tokens:
            tok = tok.strip(',').strip()
            if len(tok) == 2:
                try:
                    all_data.append(int(tok, 16))
                except ValueError:
                    pass

    if first_address is None:
        first_address = 0
    return first_address, all_data


def read_intel_hex(lines):
    """Parse Intel HEX format. Returns (start_address, data_bytes)."""
    records = []
    for line in lines:
        line = line.strip()
        if not line.startswith(':'):
            continue
        hex_str = line[1:]
        raw = bytes.fromhex(hex_str)
        byte_count = raw[0]
        address = (raw[1] << 8) | raw[2]
        rec_type = raw[3]
        data = list(raw[4:4 + byte_count])
        records.append((rec_type, address, data))

    # Extract data records (type 00)
    data_records = [(addr, data) for rtype, addr, data in records if rtype == 0]
    if not data_records:
        return 0, []

    data_records.sort(key=lambda r: r[0])
    start_addr = data_records[0][0]

    # Build contiguous data (fill gaps with 0xFF)
    end_addr = max(addr + len(data) for addr, data in data_records)
    result = [0xFF] * (end_addr - start_addr)
    for addr, data in data_records:
        offset = addr - start_addr
        for i, b in enumerate(data):
            result[offset + i] = b

    return start_addr, result


# ---------------------------------------------------------------------------
# WAV file output
# ---------------------------------------------------------------------------

def write_wav(filename, samples, sample_rate):
    """Write samples to a 16-bit mono WAV file."""
    w = wave.open(filename, 'wb')
    w.setnchannels(1)
    w.setsampwidth(2)
    w.setframerate(sample_rate)

    # Clamp and pack samples
    packed = b''
    for s in samples:
        s = max(-32768, min(32767, int(s)))
        packed += struct.pack('<h', s)

    w.writeframes(packed)
    w.close()


# ---------------------------------------------------------------------------
# Main encoder
# ---------------------------------------------------------------------------

def encode_file(blocks, sample_rate):
    """Encode a list of blocks into audio samples with continuous carrier.

    CRITICAL: No silence gaps anywhere in the signal. When playing back
    through a computer audio output (not a cassette deck), any period of
    true zero signal causes the AIM-65's input comparator to lose lock.
    A real tape deck has tape hiss that maintains signal, but a direct
    audio cable does not.

    Structure: silence | leader tone (ramp-up) | block0 | tone | block1 | ... | trailer | silence

    The 2400 Hz carrier tone between blocks looks like a stream of all-1
    bits (0xFF bytes) to the hardware. The 32 SYN characters (0x16) at
    the start of each block naturally break out of this pattern and the
    hardware detects them for byte synchronization."""
    all_samples = []

    # Initial silence (lets user press play before signal starts)
    all_samples.extend(make_silence(0.5, sample_rate))

    # Leader tone: 2400 Hz carrier with amplitude ramp-up
    leader_samples = make_tone(LEADER_SECONDS, sample_rate, 2400)
    ramp_samples = int(0.5 * sample_rate)
    for i in range(min(ramp_samples, len(leader_samples))):
        leader_samples[i] = int(leader_samples[i] * (i / ramp_samples))
    all_samples.extend(leader_samples)

    # Blocks with continuous tone between them — NO SILENCE
    for i, block in enumerate(blocks):
        # Encode block (32 SYN chars provide byte sync)
        all_samples.extend(encode_block_audio(block, sample_rate))

        # Inter-block carrier tone (continuous, no silence)
        if i < len(blocks) - 1:
            all_samples.extend(make_tone(GAP_SECONDS, sample_rate, 2400))

    # Trailer tone then silence
    all_samples.extend(make_tone(0.5, sample_rate, 2400))
    all_samples.extend(make_silence(1.0, sample_rate))

    return all_samples

    return all_samples


def main():
    parser = argparse.ArgumentParser(
        description="AIM-65 Audio Cassette Tape Encoder\n\n"
                    "Encodes data into WAV files compatible with the\n"
                    "Rockwell AIM-65 cassette tape interface.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="Examples:\n"
               "  %(prog)s -t program.bas output.wav\n"
               "  %(prog)s -b firmware.bin -a F000 output.wav\n"
               "  %(prog)s -x dump.hex output.wav\n")

    parser.add_argument('input', help='Input file to encode')
    parser.add_argument('output', help='Output WAV file')

    mode = parser.add_mutually_exclusive_group(required=True)
    mode.add_argument('-t', '--text', action='store_true',
                      help='Text mode: input is a plain text file '
                           '(BASIC program, assembler source, etc.)')
    mode.add_argument('-b', '--binary', action='store_true',
                      help='Object mode: input is a raw binary file '
                           '(requires -a for start address)')
    mode.add_argument('-x', '--hex', action='store_true',
                      help='Object mode: input is a hex dump or Intel HEX file')

    parser.add_argument('-a', '--address', metavar='ADDR',
                        help='Start address in hex for binary object mode '
                             '(e.g., F000)')
    parser.add_argument('-n', '--name', metavar='NAME',
                        help='File name stored on tape (1-5 chars, '
                             'default: derived from input filename)')
    parser.add_argument('-s', '--samplerate', type=int, default=SAMPLE_RATE,
                        help=f'WAV sample rate (default: {SAMPLE_RATE})')

    args = parser.parse_args()

    # Validate arguments
    if args.binary and not args.address:
        parser.error("-b/--binary requires -a/--address (start address in hex)")

    # Derive file name if not specified
    if args.name:
        file_name = args.name
    else:
        base = os.path.splitext(os.path.basename(args.input))[0]
        file_name = base.upper()[:5]
    file_name = file_name.ljust(5)[:5]

    sample_rate = args.samplerate

    # Read input and build blocks
    if args.text:
        text_data = read_text_input(args.input)
        blocks = build_text_blocks(file_name, text_data)
        file_type = "Text"
        data_length = len(text_data)
    elif args.binary:
        start_address = int(args.address, 16)
        binary_data = read_binary_input(args.input)
        blocks = build_object_blocks(file_name, binary_data, start_address)
        file_type = "Object"
        data_length = len(binary_data)
    elif args.hex:
        start_address, binary_data = read_hex_input(args.input)
        if args.address:
            start_address = int(args.address, 16)
        blocks = build_object_blocks(file_name, binary_data, start_address)
        file_type = "Object"
        data_length = len(binary_data)

    # Encode to audio
    samples = encode_file(blocks, sample_rate)
    duration = len(samples) / sample_rate

    # Write WAV file
    write_wav(args.output, samples, sample_rate)

    # Summary
    print()
    print("=" * 60)
    print("  Rockwell AIM-65 Audio Cassette Tape Encoder")
    print("  Version " + Version + "  " + VersionDate)
    print("=" * 60)
    print()
    print(f"  Input:      {args.input}")
    print(f"  Output:     {args.output}")
    print(f"  File Name:  \"{file_name}\"")
    print(f"  File Type:  {file_type}")
    print(f"  Data:       {data_length} bytes")
    if file_type == "Object":
        end_address = start_address + data_length - 1
        print(f"  Address:    ${start_address:04X} - ${end_address:04X}")
    print(f"  Blocks:     {len(blocks)}")
    print(f"  WAV:        {sample_rate} Hz, 16-bit mono, {duration:.2f}s")
    print()
    print("=" * 60)
    print()


if __name__ == '__main__':
    main()